Data Analysis on Electric Vehicle¶

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
In [2]:
pd.set_option('display.max_columns', None)
dt=pd.read_csv("dataset.csv")
dt.head()
Out[2]:
VIN (1-10) County City State Postal Code Model Year Make Model Electric Vehicle Type Clean Alternative Fuel Vehicle (CAFV) Eligibility Electric Range Base MSRP Legislative District DOL Vehicle ID Vehicle Location Electric Utility 2020 Census Tract
0 JTMEB3FV6N Monroe Key West FL 33040 2022 TOYOTA RAV4 PRIME Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 42 0 NaN 198968248 POINT (-81.80023 24.5545) NaN 12087972100
1 1G1RD6E45D Clark Laughlin NV 89029 2013 CHEVROLET VOLT Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 38 0 NaN 5204412 POINT (-114.57245 35.16815) NaN 32003005702
2 JN1AZ0CP8B Yakima Yakima WA 98901 2011 NISSAN LEAF Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 73 0 15.0 218972519 POINT (-120.50721 46.60448) PACIFICORP 53077001602
3 1G1FW6S08H Skagit Concrete WA 98237 2017 CHEVROLET BOLT EV Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 238 0 39.0 186750406 POINT (-121.7515 48.53892) PUGET SOUND ENERGY INC 53057951101
4 3FA6P0SU1K Snohomish Everett WA 98201 2019 FORD FUSION Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range 26 0 38.0 2006714 POINT (-122.20596 47.97659) PUGET SOUND ENERGY INC 53061041500
In [3]:
dt.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112634 entries, 0 to 112633
Data columns (total 17 columns):
 #   Column                                             Non-Null Count   Dtype  
---  ------                                             --------------   -----  
 0   VIN (1-10)                                         112634 non-null  object 
 1   County                                             112634 non-null  object 
 2   City                                               112634 non-null  object 
 3   State                                              112634 non-null  object 
 4   Postal Code                                        112634 non-null  int64  
 5   Model Year                                         112634 non-null  int64  
 6   Make                                               112634 non-null  object 
 7   Model                                              112614 non-null  object 
 8   Electric Vehicle Type                              112634 non-null  object 
 9   Clean Alternative Fuel Vehicle (CAFV) Eligibility  112634 non-null  object 
 10  Electric Range                                     112634 non-null  int64  
 11  Base MSRP                                          112634 non-null  int64  
 12  Legislative District                               112348 non-null  float64
 13  DOL Vehicle ID                                     112634 non-null  int64  
 14  Vehicle Location                                   112610 non-null  object 
 15  Electric Utility                                   112191 non-null  object 
 16  2020 Census Tract                                  112634 non-null  int64  
dtypes: float64(1), int64(6), object(10)
memory usage: 14.6+ MB
In [4]:
dt.isna().sum()
Out[4]:
VIN (1-10)                                             0
County                                                 0
City                                                   0
State                                                  0
Postal Code                                            0
Model Year                                             0
Make                                                   0
Model                                                 20
Electric Vehicle Type                                  0
Clean Alternative Fuel Vehicle (CAFV) Eligibility      0
Electric Range                                         0
Base MSRP                                              0
Legislative District                                 286
DOL Vehicle ID                                         0
Vehicle Location                                      24
Electric Utility                                     443
2020 Census Tract                                      0
dtype: int64
In [5]:
dt.dropna(inplace=True)
In [6]:
dt.duplicated().sum()
Out[6]:
0
In [7]:
dt.shape
Out[7]:
(112152, 17)

univariant analysis¶

In [8]:
dt.select_dtypes('number').columns
Out[8]:
Index(['Postal Code', 'Model Year', 'Electric Range', 'Base MSRP',
       'Legislative District', 'DOL Vehicle ID', '2020 Census Tract'],
      dtype='object')
In [9]:
dt['Model Year'].value_counts()
Out[9]:
2022    26455
2021    18277
2018    14190
2020    10998
2019    10216
2017     8598
2016     5709
2015     4918
2013     4669
2014     3665
2023     1863
2012     1695
2011      835
2010       24
2008       23
2000       10
1999        3
2002        2
1997        1
1998        1
Name: Model Year, dtype: int64
In [11]:
fig=px.histogram(dt,x='Model Year')
fig.update_traces(marker=dict(color='orange', line=dict(color='black', width=2)))
fig.show()

Between 2020 to 2022 more numbers of models are manufactured

In [10]:
fig=px.histogram(dt,x='Postal Code')
fig.update_traces(marker=dict(color='Green', line=dict(color='black', width=2)))
fig.show()
In [42]:
filt = dt['County'].value_counts().reset_index(name='Count').head(10)
fig = go.Figure()
fig.add_trace(go.Scatter(x=filt['index'], y=filt['Count'],mode='lines+markers',
                         marker=dict(size=10,),  
                         line=dict(color='blue', width=2),  
                         showlegend=False))
fig.update_layout(title='Car Make Frequency',
                  xaxis_title='Car Make', yaxis_title='Count')
fig.show()
In [13]:
dt.select_dtypes(include='object').columns
Out[13]:
Index(['VIN (1-10)', 'County', 'City', 'State', 'Make', 'Model',
       'Electric Vehicle Type',
       'Clean Alternative Fuel Vehicle (CAFV) Eligibility', 'Vehicle Location',
       'Electric Utility'],
      dtype='object')
In [14]:
filt = dt['Make'].value_counts().head(15)
fig = px.bar(filt, y=filt.index, x=filt.values,color=filt.index,title="Frequency of Car Makes")
fig.update_layout(xaxis_title='Car Make', yaxis_title='Count',xaxis_tickangle=0)
fig.show()
In [15]:
filt=dt['County'].value_counts().head(10)
fig=px.bar(filt,y='County',color=filt.index)
fig.update_layout(xaxis_title='County', yaxis_title='count',title='Frequency of County')
fig.show()
In [40]:
filt = dt.groupby(['County', 'City']).size().reset_index(name='Count')

fig = px.treemap(
    filt,
    path=['County', 'City'],
    values='Count', 
    title="Car Counts by County and City",
    color='Count',
    color_continuous_scale='Blues')

fig.show()
/Users/koushik/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

/Users/koushik/opt/anaconda3/lib/python3.9/site-packages/plotly/express/_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

In [17]:
filt=dt['Electric Vehicle Type'].value_counts().reset_index(name='Count')
fig=px.pie(filt,names='index',values='Count',hole=0.4)
fig.update_layout(title="Electric Vehicle Type")
fig.show()
In [18]:
filt=dt['City'].value_counts().reset_index(name='Count').head(15)
fig=px.pie(filt,names='index',values='Count',color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title="Types of City")
fig.show()
In [19]:
filt = dt['Model Year'].value_counts().reset_index(name='Count').head(10)
fig = go.Figure()
fig.add_trace(go.Scatter(x=filt['index'], y=filt['Count'],mode='lines+markers',
                         marker=dict(size=10,),  
                         line=dict(color='blue', width=2),  
                         showlegend=False))
fig.update_layout(title='Model Year',xaxis_title='Model Year', yaxis_title='Count')
fig.show()
In [20]:
dt.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 112152 entries, 2 to 112633
Data columns (total 17 columns):
 #   Column                                             Non-Null Count   Dtype  
---  ------                                             --------------   -----  
 0   VIN (1-10)                                         112152 non-null  object 
 1   County                                             112152 non-null  object 
 2   City                                               112152 non-null  object 
 3   State                                              112152 non-null  object 
 4   Postal Code                                        112152 non-null  int64  
 5   Model Year                                         112152 non-null  int64  
 6   Make                                               112152 non-null  object 
 7   Model                                              112152 non-null  object 
 8   Electric Vehicle Type                              112152 non-null  object 
 9   Clean Alternative Fuel Vehicle (CAFV) Eligibility  112152 non-null  object 
 10  Electric Range                                     112152 non-null  int64  
 11  Base MSRP                                          112152 non-null  int64  
 12  Legislative District                               112152 non-null  float64
 13  DOL Vehicle ID                                     112152 non-null  int64  
 14  Vehicle Location                                   112152 non-null  object 
 15  Electric Utility                                   112152 non-null  object 
 16  2020 Census Tract                                  112152 non-null  int64  
dtypes: float64(1), int64(6), object(10)
memory usage: 15.4+ MB

bivariant¶

In [22]:
c = dt.groupby(['Model Year', 'Make']).size().reset_index(name='Count')
top_c = c.sort_values(by='Count', ascending=False).head(50)
pivot_data = top_c.pivot(index='Model Year', columns='Make', values='Count').fillna(0)
fig = go.Figure()
for make in pivot_data.columns:
    fig.add_trace(go.Scatter(x=pivot_data.index, y=pivot_data[make], mode='lines+markers',
                             name=make, marker=dict(size=8), line=dict(width=2)))
fig.update_layout(title='Top Car Makes by Model Year',xaxis_title='Model Year',yaxis_title='Count',hovermode='x')

fig.show()
In [23]:
fig_box_ev_type_range = px.box(dt, x='Electric Vehicle Type', y='Electric Range')
fig_box_ev_type_range.show()
In [24]:
df_counts = dt.groupby(['Model Year', 'Make']).size().reset_index(name='Count').tail(100)
fig=px.bar(df_counts, x='Model Year', y='Count', color='Make',
                 title='Count of Different Electric Vehicles by Model Year')
fig.show()
In [25]:
fig = px.scatter(dt, x='Model Year', y='Electric Range',color='Clean Alternative Fuel Vehicle (CAFV) Eligibility',title='Electric Range by Car Make',
                 labels={'Electric Range': 'Electric Range', 'Make': 'Car Make'},
                 hover_data=['Make', 'Electric Range'])

fig.show()
In [36]:
avg = dt.groupby('Make')['Electric Range'].mean().reset_index()
fig = go.Figure()
fig.add_trace(go.Scatter(x=avg['Make'], y=avg['Electric Range'],
                         mode='lines+markers',
                         marker=dict(size=10, color='blue'),
                         line=dict(color='blue', width=2),
                         name='Electric Range'))
fig.update_layout(title='Electric Range by Car Make',
                  xaxis_title='Car Make',
                  yaxis_title='Electric Range',
                  yaxis=dict(range=[0, max(avg['Electric Range']) + 50]),
                  hovermode='closest')
fig.show()
In [45]:
dt.head()
Out[45]:
VIN (1-10) County City State Postal Code Model Year Make Model Electric Vehicle Type Clean Alternative Fuel Vehicle (CAFV) Eligibility Electric Range Base MSRP Legislative District DOL Vehicle ID Vehicle Location Electric Utility 2020 Census Tract Longitude Latitude
2 JN1AZ0CP8B Yakima Yakima WA 98901 2011 NISSAN LEAF Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 73 0 15.0 218972519 POINT (-120.50721 46.60448) PACIFICORP 53077001602 -120.50721 46.60448
3 1G1FW6S08H Skagit Concrete WA 98237 2017 CHEVROLET BOLT EV Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 238 0 39.0 186750406 POINT (-121.7515 48.53892) PUGET SOUND ENERGY INC 53057951101 -121.75150 48.53892
4 3FA6P0SU1K Snohomish Everett WA 98201 2019 FORD FUSION Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range 26 0 38.0 2006714 POINT (-122.20596 47.97659) PUGET SOUND ENERGY INC 53061041500 -122.20596 47.97659
5 5YJ3E1EB5J Snohomish Bothell WA 98021 2018 TESLA MODEL 3 Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 215 0 1.0 475635324 POINT (-122.18384 47.8031) PUGET SOUND ENERGY INC 53061051916 -122.18384 47.80310
6 1N4AZ0CP4D Snohomish Everett WA 98203 2013 NISSAN LEAF Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 75 0 38.0 253546023 POINT (-122.23019 47.94949) PUGET SOUND ENERGY INC 53061040900 -122.23019 47.94949
In [27]:
fig_density = px.density_contour(
    dt,
    x='Base MSRP', 
    y='Electric Range',
    color='Clean Alternative Fuel Vehicle (CAFV) Eligibility', 
    title='2D Density Plot: Electric Range vs. Base MSRP',
    labels={'Base MSRP': 'Base MSRP ($)', 'Electric Range': 'Electric Range (miles)'},
    height=600, 
)
fig_density.update_layout(
    xaxis_title='Base MSRP ($)',
    yaxis_title='Electric Range (miles)',
)

fig_density.show()
In [28]:
c=dt.groupby('Make')['Model'].value_counts().to_frame()
c.head(30)
Out[28]:
Model
Make Model
AUDI E-TRON 795
A3 572
Q5 E 478
E-TRON SPORTBACK 171
Q5 137
E-TRON GT 75
Q4 62
RS E-TRON GT 16
A7 11
A8 E 3
AZURE DYNAMICS TRANSIT CONNECT ELECTRIC 7
BENTLEY BENTAYGA 2
FLYING SPUR 1
BMW I3 1888
X5 1402
530E 323
330E 299
X3 292
IX 162
I4 154
I8 102
740E 29
745E 7
745LE 2
CADILLAC ELR 76
CT6 32
CHEVROLET BOLT EV 4895
VOLT 4870
SPARK 248
BOLT EUV 126
In [29]:
c = dt.groupby('Make')['Model'].value_counts().to_frame(name='Count')
c = c.reset_index()
top_50 = c.head(60)
fig = px.bar(top_50, x='Model', y='Count', color='Make',
             title='Top 50 Make-Model Combinations',
             labels={'Model': 'Car Model', 'Count': 'Frequency'},
             hover_data=['Make'])
fig.update_layout(xaxis_tickangle=90)
fig.show()
In [44]:
make_counts = dt['Make'].value_counts().reset_index()
make_counts.columns = ['Make', 'Count']

top_makes = make_counts.head(15)
fig_pie = px.pie(top_makes, 
                 names='Make', 
                 values='Count', 
                 title='Top mode EV Makes by Frequency',
                 labels={'Make': 'Car Make', 'Count': 'Frequency'})

fig_pie.show()

Choropleth¶

In [31]:
ev_count_by_state = dt.groupby('State').size().reset_index(name='Number_of_EV_Vehicles')
ev_count_by_state
fig = px.choropleth(ev_count_by_state,
                    locations='State',
                    locationmode="USA-states",
                    color='Number_of_EV_Vehicles',
                    scope="usa",
                    color_continuous_scale="Viridis",
                    labels={'Number_of_EV_Vehicles': 'EV Vehicles'},
                    title='Number of EV Vehicles by State')


fig.show()
In [46]:
dt['Longitude'] = dt['Vehicle Location'].apply(lambda loc: float(loc.split()[1][1:]))
dt['Latitude'] = dt['Vehicle Location'].apply(lambda loc: float(loc.split()[2][:-1]))

location_counts = dt.groupby(['Latitude', 'Longitude', 'Postal Code', 'County', "State"]).size().reset_index(name='EV Count')
fig_scatter_map = px.scatter_mapbox(location_counts,
                                    lat='Latitude',
                                    lon='Longitude',
                                    color='EV Count',
                                    size='EV Count',
                                    mapbox_style='carto-positron',
                                    zoom=3,
                                    center={'lat': 37.0902, 'lon': -95.7129},
                                    title='Map of Electric Vehicle Locations')

fig_scatter_map.show()

Racing Bar Plot¶

In [33]:
ev_make_by_year = dt.groupby(['Make', 'Model Year']).size().reset_index(name='Number of Vehicles')
ev_make_by_year.head()
Out[33]:
Make Model Year Number of Vehicles
0 AUDI 2016 214
1 AUDI 2017 185
2 AUDI 2018 173
3 AUDI 2019 387
4 AUDI 2020 224
In [34]:
fig = px.bar(ev_make_by_year,
             y='Make',
             x='Number of Vehicles', 
             color='Make', 
             animation_frame='Model Year', 
             orientation='h',
             title='EV Makes and their Count Over the Years',
             labels={'Number of Vehicles': 'Number of EV Vehicles'},
             range_x=[0, 3000]
            )

fig.update_traces(texttemplate='%{x}',
                  textposition='outside',
                  textfont_size=16)
fig.update_layout(
    xaxis=dict(showgrid=True, gridcolor='LightGray'),
    yaxis_title='EV Makes',
    xaxis_title='Number of EV Vehicles',
    showlegend=False, 
    title_x=0.5, 
    title_font=dict(size=20),
    margin=dict(l=50, r=50, t=50, b=50),
    width=800,
    height=600)


fig.show()
In [ ]: